Analysis on Fertility of Mother’s Hispanic Origin
Load the library
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(readr)
Get Data
Natality_MOH <- read_delim("Natality,MOH.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 5362 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): Notes, Mother's Hispanic Origin, Mother's Hispanic Origin Code, Mo...
## dbl (3): Year, Year Code, Births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Look Data
glimpse(Natality_MOH)
## Rows: 5,362
## Columns: 14
## $ Notes <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Mother's Hispanic Origin` <chr> "Hispanic or Latino", "Hispanic or…
## $ `Mother's Hispanic Origin Code` <chr> "2135-2", "2135-2", "2135-2", "213…
## $ `Mother's Single Race 6` <chr> "American Indian or Alaska Native"…
## $ `Mother's Single Race 6 Code` <chr> "1002-5", "1002-5", "1002-5", "100…
## $ `Census Division of Residence` <chr> "Division 1: New England", "Divisi…
## $ `Census Division of Residence Code` <chr> "CENS-D1", "CENS-D1", "CENS-D1", "…
## $ Year <dbl> 2016, 2016, 2016, 2016, 2016, 2017…
## $ `Year Code` <dbl> 2016, 2016, 2016, 2016, 2016, 2017…
## $ `Age of Mother 9` <chr> "15-19 years", "20-24 years", "25-…
## $ `Age of Mother 9 Code` <chr> "15-19", "20-24", "25-29", "30-34"…
## $ Births <dbl> 15, 39, 49, 35, 19, 17, 51, 54, 42…
## $ `Female Population` <chr> "1631", "1730", "1681", "1564", "1…
## $ `Fertility Rate` <chr> "9.20", "22.54", "29.15", "22.38",…
Rename and Select
data <- Natality_MOH %>%
rename ( Origin = `Mother's Hispanic Origin` ,
Race = `Mother's Single Race 6` ,
Region = `Census Division of Residence Code`,
Birth = Births ,
Pop = `Female Population`,
Rate = `Fertility Rate` ,
Age = `Age of Mother 9 Code` ) %>%
select( Origin , Race, Birth , Pop,Rate ,Age , Year , Region )
glimpse(data)
## Rows: 5,362
## Columns: 8
## $ Origin <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Latino…
## $ Race <chr> "American Indian or Alaska Native", "American Indian or Alaska …
## $ Birth <dbl> 15, 39, 49, 35, 19, 17, 51, 54, 42, 34, 23, 56, 65, 54, 47, 21,…
## $ Pop <chr> "1631", "1730", "1681", "1564", "1595", "1626", "1725", "1716",…
## $ Rate <chr> "9.20", "22.54", "29.15", "22.38", "11.91", "10.46", "29.57", "…
## $ Age <chr> "15-19", "20-24", "25-29", "30-34", "35-39", "15-19", "20-24", …
## $ Year <dbl> 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 2017, 201…
## $ Region <chr> "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1…
Recode
data_recode <- data %>%
mutate(
Race = ifelse(Race == "American Indian or Alaska Native","AmInd",Race),
Race = ifelse(Race == "Asian or Pacific Islander","API",Race),
Race = ifelse(Race == "Black or African American","Black",Race),
Pop = as.numeric(Pop),
Rate = as.numeric(Rate)/1000) %>%
filter(Race != "Not Reported") %>%
drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
glimpse(data_recode)
## Rows: 3,721
## Columns: 8
## $ Origin <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Latino…
## $ Race <chr> "AmInd", "AmInd", "AmInd", "AmInd", "AmInd", "AmInd", "AmInd", …
## $ Birth <dbl> 15, 39, 49, 35, 19, 17, 51, 54, 42, 34, 23, 56, 65, 54, 47, 21,…
## $ Pop <dbl> 1631, 1730, 1681, 1564, 1595, 1626, 1725, 1716, 1587, 1653, 172…
## $ Rate <dbl> 0.00920, 0.02254, 0.02915, 0.02238, 0.01191, 0.01046, 0.02957, …
## $ Age <chr> "15-19", "20-24", "25-29", "30-34", "35-39", "15-19", "20-24", …
## $ Year <dbl> 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 2017, 201…
## $ Region <chr> "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1…
Grid Plot
data_recode %>%
filter(Age == "15-19") %>%
ggplot(aes(x = Origin , y = Rate)) +
geom_point(alpha = 0.4) +
facet_grid(Race~Region) +
theme(axis.text.x = element_text(angle = 90))+
ggtitle("Plot of Origin for 15-19 age group by Race and Region")

Flip the Grid
data_recode %>%
filter(Age == "15-19") %>%
ggplot(aes(x = Origin , y = Rate)) +
geom_point(alpha = 0.4) +
facet_grid(Region~Race) +
theme(axis.text.x = element_text(angle = 90))+
ggtitle("Flip Grid Plot of Origin for 15-19 age group by Race and Region")

National TFR by Origin
plot = data_recode %>%
group_by( Year,Origin, Age) %>%
summarize(Birth = sum(Birth),
Pop = sum(Pop)) %>%
mutate(Rate = Birth/Pop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = Origin)) +
geom_point()
## `summarise()` has grouped output by 'Year', 'Origin'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
ggplotly(plot)
Plot with Origin and Region
plot1 = data_recode %>%
group_by(Year, Region, Origin, Age) %>%
summarize(Birth = sum(Birth),
Pop = sum(Pop)) %>%
mutate(Rate = Birth/Pop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = Origin)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))+
facet_grid(Origin~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'Origin'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using
## the `.groups` argument.
ggplotly(plot1)